package cn.turboinfo.fuyang.api.domain.util;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.text.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.safety.Safelist;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Slf4j
public class HtmlHelper {

    static String regExScript = "<script[^>]*?>[\\s\\S]*?<\\/script>";
    static String regExStyle = "<style[^>]*?>[\\s\\S]*?<\\/style>";
    static String regExHtml = "<[^>]+>";

    /**
     * 过滤html标签
     *
     * @param htmlStr html
     * @return
     */
    public static String tireHtmlTag(String htmlStr) {
        // 替换&amp;nbsp;
        htmlStr = StringEscapeUtils.unescapeHtml4(htmlStr);

        // 过滤script标签
        Pattern pScript = Pattern.compile(regExScript, Pattern.CASE_INSENSITIVE);
        Matcher mScript = pScript.matcher(htmlStr);
        htmlStr = mScript.replaceAll("");

        // 过滤style标签
        Pattern pStyle = Pattern.compile(regExStyle, Pattern.CASE_INSENSITIVE);
        Matcher mStyle = pStyle.matcher(htmlStr);
        htmlStr = mStyle.replaceAll("");

        // 过滤html标签
        Pattern pHtml = Pattern.compile(regExHtml, Pattern.CASE_INSENSITIVE);
        Matcher mHtml = pHtml.matcher(htmlStr);
        htmlStr = mHtml.replaceAll("");

        return htmlStr;
    }

    /**
     * 安全过滤
     *
     * @param htmlStr 文本
     * @return 过滤后的文本
     */
    public static String safeFilter(String htmlStr) {
        // 替换&amp;nbsp;
//        htmlStr = StringEscapeUtils.unescapeHtml4(htmlStr);

        Safelist safelist = Safelist.relaxed()
                .addAttributes(":all", "style", "class", "src")
                .addProtocols("img", "src");

        return Jsoup.clean(htmlStr, safelist);
    }
}
